library(tidyverse)
library(mice)
library(skimr)
library(corrplot)
library(car)
library(ISLR)
library(ggplot2)
library(gridExtra)
library(SamplingStrata)
library(rbin)
library(leaps)
library(dplyr)
library(ggplot2)
library(geosphere)
library(broom)
library(plyr)
library(devtools)
options(scipen=999)
nycraw <- read.csv("https://raw.githubusercontent.com/JaclynCoate/6372_Project_1/master/AB_NYC_2019.csv", header = TRUE, strip.white=TRUE)
head(nycraw)
## id name host_id
## 1 2539 Clean & quiet apt home by the park 2787
## 2 2595 Skylit Midtown Castle 2845
## 3 3831 Cozy Entire Floor of Brownstone 4869
## 4 5022 Entire Apt: Spacious Studio/Loft by central park 7192
## 5 5099 Large Cozy 1 BR Apartment In Midtown East 7322
## 6 5121 BlissArtsSpace! 7356
## host_name neighbourhood_group neighbourhood latitude longitude
## 1 John Brooklyn Kensington 40.64749 -73.97237
## 2 Jennifer Manhattan Midtown 40.75362 -73.98377
## 3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976
## 4 Laura Manhattan East Harlem 40.79851 -73.94399
## 5 Chris Manhattan Murray Hill 40.74767 -73.97500
## 6 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596
## room_type price minimum_nights number_of_reviews last_review
## 1 Private room 149 1 9 10/19/18
## 2 Entire home/apt 225 1 45 5/21/19
## 3 Entire home/apt 89 1 270 7/5/19
## 4 Entire home/apt 80 10 9 11/19/18
## 5 Entire home/apt 200 3 74 6/22/19
## 6 Private room 60 45 49 10/5/17
## reviews_per_month calculated_host_listings_count availability_365
## 1 0.21 6 365
## 2 0.38 2 355
## 3 4.64 1 194
## 4 0.10 1 0
## 5 0.59 1 129
## 6 0.40 1 0
str(nycraw)
## 'data.frame': 34464 obs. of 16 variables:
## $ id : int 2539 2595 3831 5022 5099 5121 5178 5203 5238 5295 ...
## $ name : Factor w/ 34000 levels ""," Private 1 bdrm Lefferts Gr, BK apt",..: 8990 27105 11178 13776 17775 5839 17808 11099 12643 3931 ...
## $ host_id : int 2787 2845 4869 7192 7322 7356 8967 7490 7549 7702 ...
## $ host_name : Factor w/ 9124 levels ""," Valéria",..: 4017 3840 4984 4754 1544 2840 7735 5519 958 4836 ...
## $ neighbourhood_group : Factor w/ 5 levels "Bronx","Brooklyn",..: 2 3 2 3 3 2 3 3 3 3 ...
## $ neighbourhood : Factor w/ 218 levels "Allerton","Arden Heights",..: 108 127 42 62 137 14 95 201 36 201 ...
## $ latitude : num 40.6 40.8 40.7 40.8 40.7 ...
## $ longitude : num -74 -74 -74 -73.9 -74 ...
## $ room_type : Factor w/ 3 levels "Entire home/apt",..: 2 1 1 1 1 2 2 2 1 1 ...
## $ price : int 149 225 89 80 200 60 79 79 150 135 ...
## $ minimum_nights : int 1 1 1 10 3 45 2 2 1 5 ...
## $ number_of_reviews : int 9 45 270 9 74 49 430 118 160 53 ...
## $ last_review : Factor w/ 908 levels "1/1/17","1/1/18",..: 113 575 775 175 671 144 677 745 716 671 ...
## $ reviews_per_month : num 0.21 0.38 4.64 0.1 0.59 0.4 3.47 0.99 1.33 0.43 ...
## $ calculated_host_listings_count: int 6 2 1 1 1 1 1 1 4 1 ...
## $ availability_365 : int 365 355 194 0 129 0 220 0 188 6 ...
#Creating a new, tsquare_distance (distance to Times Square in miles)
for (i in 1:nrow(nycraw)) {
n <- (distHaversine(c(nycraw$latitude[i], -nycraw$longitude[i]), c(40.7580, 73.9855)) / 1609.344)
nycraw$tsquare_distance[i] <- n
}
head(nycraw)
## id name host_id
## 1 2539 Clean & quiet apt home by the park 2787
## 2 2595 Skylit Midtown Castle 2845
## 3 3831 Cozy Entire Floor of Brownstone 4869
## 4 5022 Entire Apt: Spacious Studio/Loft by central park 7192
## 5 5099 Large Cozy 1 BR Apartment In Midtown East 7322
## 6 5121 BlissArtsSpace! 7356
## host_name neighbourhood_group neighbourhood latitude longitude
## 1 John Brooklyn Kensington 40.64749 -73.97237
## 2 Jennifer Manhattan Midtown 40.75362 -73.98377
## 3 LisaRoxanne Brooklyn Clinton Hill 40.68514 -73.95976
## 4 Laura Manhattan East Harlem 40.79851 -73.94399
## 5 Chris Manhattan Murray Hill 40.74767 -73.97500
## 6 Garon Brooklyn Bedford-Stuyvesant 40.68688 -73.95596
## room_type price minimum_nights number_of_reviews last_review
## 1 Private room 149 1 9 10/19/18
## 2 Entire home/apt 225 1 45 5/21/19
## 3 Entire home/apt 89 1 270 7/5/19
## 4 Entire home/apt 80 10 9 11/19/18
## 5 Entire home/apt 200 3 74 6/22/19
## 6 Private room 60 45 49 10/5/17
## reviews_per_month calculated_host_listings_count availability_365
## 1 0.21 6 365
## 2 0.38 2 355
## 3 4.64 1 194
## 4 0.10 1 0
## 5 0.59 1 129
## 6 0.40 1 0
## tsquare_distance
## 1 2.2968748
## 2 0.1459679
## 3 2.2596886
## 4 2.9737754
## 5 0.7525852
## 6 2.4536331
#Dropping logical irrelevant variables: "id", "name", "host_id", "host_name", "last_review", "latitude", "longitude", "neighborhood"
nyc2 <- select(nycraw, -c("id", "name", "host_id", "host_name", "last_review", "latitude", "longitude", "neighbourhood_group"))
head(nyc2)
## neighbourhood room_type price minimum_nights
## 1 Kensington Private room 149 1
## 2 Midtown Entire home/apt 225 1
## 3 Clinton Hill Entire home/apt 89 1
## 4 East Harlem Entire home/apt 80 10
## 5 Murray Hill Entire home/apt 200 3
## 6 Bedford-Stuyvesant Private room 60 45
## number_of_reviews reviews_per_month calculated_host_listings_count
## 1 9 0.21 6
## 2 45 0.38 2
## 3 270 4.64 1
## 4 9 0.10 1
## 5 74 0.59 1
## 6 49 0.40 1
## availability_365 tsquare_distance
## 1 365 2.2968748
## 2 355 0.1459679
## 3 194 2.2596886
## 4 0 2.9737754
## 5 129 0.7525852
## 6 0 2.4536331
nyc2 <- nyc2[!(nyc2$price==0),]
nyc2 <- nyc2[!(nyc2$availability_365==0),]
invisible(view(nyc2))
#Checking for NAs
md.pattern(nyc2)
## /\ /\
## { `---' }
## { O O }
## ==> V <== No need for mice. This data set is completely observed.
## \ \|/ /
## `-----'
## neighbourhood room_type price minimum_nights number_of_reviews
## 25487 1 1 1 1 1
## 0 0 0 0 0
## reviews_per_month calculated_host_listings_count availability_365
## 25487 1 1 1
## 0 0 0
## tsquare_distance
## 25487 1 0
## 0 0
nrow(nyc2)
## [1] 25487
#Drop NAs that are present
nyc3 <- na.omit(nyc2)
#Confirming NA drop
nrow(nyc3)
## [1] 25487
#{r zero variable check} #Results show no zero variance variables, leave in all #skim(nyc3) #
#Storing categorical variables as factors
skim(nyc3)
## Skim summary statistics
## n obs: 25487
## n variables: 9
##
## ── Variable type:factor ──────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n n_unique
## neighbourhood 0 25487 25487 217
## room_type 0 25487 25487 3
## top_counts ordered
## Bed: 2172, Wil: 1739, Har: 1470, Bus: 1195 FALSE
## Ent: 13363, Pri: 11472, Sha: 652, NA: 0 FALSE
##
## ── Variable type:integer ─────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0
## availability_365 0 25487 25487 169.11 123.76 1
## calculated_host_listings_count 0 25487 25487 6.98 32.08 1
## minimum_nights 0 25487 25487 6.23 16.8 1
## number_of_reviews 0 25487 25487 38.94 54.8 1
## price 0 25487 25487 146.99 170.37 10
## p25 p50 p75 p100 hist
## 52 157 290 365 ▇▅▂▃▂▃▃▆
## 1 1 3 327 ▇▁▁▁▁▁▁▁
## 1 2 4 999 ▇▁▁▁▁▁▁▁
## 5 17 51 629 ▇▁▁▁▁▁▁▁
## 69 107 175 8500 ▇▁▁▁▁▁▁▁
##
## ── Variable type:numeric ─────────────────────────────────────────────────────────────────────────────────────────────────────────
## variable missing complete n mean sd p0 p25 p50 p75
## reviews_per_month 0 25487 25487 1.85 1.81 0.02 0.49 1.29 2.73
## tsquare_distance 0 25487 25487 3.44 3.07 0.016 1.48 2.69 4.26
## p100 hist
## 58.5 ▇▁▁▁▁▁▁▁
## 18.86 ▇▇▂▁▁▁▁▁
corrNYC <- nyc3
#Table numeric variables
corrNYCTable <- corrNYC %>% keep(is.numeric) %>% cor %>% view
#Plot numeric variables v numeric variables
corrNYC %>% keep(is.numeric) %>% cor %>% corrplot("upper", addCoef.col = "white", number.digits = 2, number.cex = 0.5, method="square", order="hclust", tl.srt=45, tl.cex = 0.8)
invisible(view(corrNYCTable))
#Removing reviews_per_month due to high correlation of is and number_of_reviews
nyc4 <- select(nyc3, -c("reviews_per_month"))
summary(nyc4)
## neighbourhood room_type price
## Bedford-Stuyvesant: 2172 Entire home/apt:13363 Min. : 10
## Williamsburg : 1739 Private room :11472 1st Qu.: 69
## Harlem : 1470 Shared room : 652 Median : 107
## Bushwick : 1195 Mean : 147
## Hell's Kitchen : 1171 3rd Qu.: 175
## Upper East Side : 867 Max. :8500
## (Other) :16873
## minimum_nights number_of_reviews calculated_host_listings_count
## Min. : 1.000 Min. : 1.00 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 5.00 1st Qu.: 1.000
## Median : 2.000 Median : 17.00 Median : 1.000
## Mean : 6.233 Mean : 38.94 Mean : 6.981
## 3rd Qu.: 4.000 3rd Qu.: 51.00 3rd Qu.: 3.000
## Max. :999.000 Max. :629.00 Max. :327.000
##
## availability_365 tsquare_distance
## Min. : 1.0 Min. : 0.01632
## 1st Qu.: 52.0 1st Qu.: 1.47596
## Median :157.0 Median : 2.69428
## Mean :169.1 Mean : 3.44215
## 3rd Qu.:290.0 3rd Qu.: 4.25571
## Max. :365.0 Max. :18.85560
##
nyc4 <- filter(nyc4, price >= 25 & price <= 400)
nyc4 <- nyc4[!(nyc4$minimum_nights > 365),]
invisible(view(nyc4))
#nyc4 %>% pairs() No color model
pairs(nyc4,col=nyc4$neighbourhood) #Color by neighborhood
log.nyc <- nyc4 %>% mutate(lprice=log(price))
log.nyc <- select(log.nyc, -c("price"))
invisible(log.nyc)
pairs(log.nyc,col=log.nyc$neighbourhood)
log.indep.nyc <- log.nyc %>% mutate(lreviews=log(number_of_reviews))
log.indep.nyc <- log.indep.nyc %>% mutate(lnights=log(minimum_nights))
log.indep.nyc <- log.indep.nyc %>% mutate(llistings=log(calculated_host_listings_count))
log.indep.nyc <- log.indep.nyc %>% mutate(lavailablility=log(availability_365))
log.indep.nyc <- log.indep.nyc %>% mutate(ltsqr=log(tsquare_distance))
invisible(log.indep.nyc)
log.indep.nyc <- select(log.indep.nyc, -c("minimum_nights", "number_of_reviews", "calculated_host_listings_count", "availability_365", "tsquare_distance"))
# Checking for -inf logged results
invisible(log.indep.nyc)
# Drop -inf log reults in lavailability
log.indep.nyc<-log.indep.nyc[!(log.indep.nyc$lavailablility=="-Inf"),]
invisible(log.indep.nyc)
pairs(log.indep.nyc,col=log.indep.nyc$neighbourhood) #Color by neighborhood
nyc.bins <- nyc4
nyc.bins$reviewsBin <- var.bin(nyc.bins$number_of_reviews, bins = 50)
nyc.bins$nightsBin <- var.bin(nyc.bins$minimum_nights, bins = 50)
nyc.bins$availBin <- var.bin(nyc.bins$availability_365, bins = 50)
nyc.bins$listBin <- var.bin(nyc.bins$calculated_host_listings_count, bins = 10)
nyc.bins$tsquBin <- var.bin(nyc.bins$tsquare_distance, bins = 20)
nyc.bins <- select(nyc.bins,-c("minimum_nights", "number_of_reviews", "calculated_host_listings_count", "availability_365", "tsquare_distance"))
invisible(nyc.bins)
nyc.bin.model <-lm(price~.,data=nyc.bins)
#nyc.bins %>% pairs() No color model
pairs(nyc.bins,col=nyc.bins$neighbourhood) #Color by neighborhood
plot(nyc4$neighbourhood, nyc4$price, xlab = "Neighbourhood", ylab = "Price", title = "Price v Neighbourhood Correlation Check", col=c(7,32,52,82,107))
nycNeighborhood <- ddply(nyc4,.(neighbourhood), function(x) x[sample(nrow(x),1),])
plot(nycNeighborhood$neighbourhood, nycNeighborhood$price, xlab = "Neighbourhood", ylab = "Price", title = "Price v Neighbourhood Correlation Check")
plot(nyc4$room_type, nyc4$price, xlab = "Room Type", ylab = "Price", title = "Price v Room Type Correlation Check", col=c(7,32,52))
nyc.model = lm(lprice~neighbourhood + room_type + minimum_nights + number_of_reviews + calculated_host_listings_count + availability_365 + tsquare_distance, data=log.nyc)
summary(nyc.model)
##
## Call:
## lm(formula = lprice ~ neighbourhood + room_type + minimum_nights +
## number_of_reviews + calculated_host_listings_count + availability_365 +
## tsquare_distance, data = log.nyc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.56065 -0.24179 -0.02474 0.21120 2.16671
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 5.29397507 0.07801513 67.858
## neighbourhoodArden Heights 0.09987198 0.22240715 0.449
## neighbourhoodArrochar -0.23255321 0.10638369 -2.186
## neighbourhoodArverne 0.68096923 0.08310209 8.194
## neighbourhoodAstoria -0.11001920 0.07026454 -1.566
## neighbourhoodBath Beach -0.42777654 0.12335347 -3.468
## neighbourhoodBattery Park City 0.13620085 0.12153289 1.121
## neighbourhoodBay Ridge -0.17395292 0.07984694 -2.179
## neighbourhoodBay Terrace 0.80798639 0.17769392 4.547
## neighbourhoodBaychester 0.09133318 0.16280354 0.561
## neighbourhoodBayside 0.50040781 0.09899653 5.055
## neighbourhoodBayswater 0.61573566 0.13648099 4.512
## neighbourhoodBedford-Stuyvesant -0.20501068 0.07025206 -2.918
## neighbourhoodBelle Harbor 0.80916632 0.17595985 4.599
## neighbourhoodBellerose 0.93671670 0.15871634 5.902
## neighbourhoodBelmont -0.06285772 0.11485584 -0.547
## neighbourhoodBensonhurst -0.44614043 0.08884249 -5.022
## neighbourhoodBergen Beach -0.24524651 0.14536608 -1.687
## neighbourhoodBoerum Hill -0.03857191 0.08500054 -0.454
## neighbourhoodBorough Park -0.47358248 0.08167786 -5.798
## neighbourhoodBreezy Point 1.27727475 0.26658042 4.791
## neighbourhoodBriarwood 0.27061558 0.08944621 3.025
## neighbourhoodBrighton Beach -0.24723574 0.08646053 -2.860
## neighbourhoodBronxdale -0.27859298 0.12383387 -2.250
## neighbourhoodBrooklyn Heights -0.00732640 0.08812413 -0.083
## neighbourhoodBrownsville -0.21869688 0.08734324 -2.504
## neighbourhoodBull's Head 0.57481364 0.37184696 1.546
## neighbourhoodBushwick -0.14982633 0.06868412 -2.181
## neighbourhoodCambria Heights 0.84997381 0.13401339 6.342
## neighbourhoodCanarsie -0.16104793 0.07347689 -2.192
## neighbourhoodCarroll Gardens -0.06361437 0.08196143 -0.776
## neighbourhoodCastle Hill -0.30095725 0.26659924 -1.129
## neighbourhoodCastleton Corners 0.34041077 0.26663628 1.277
## neighbourhoodChelsea 0.10701712 0.07744292 1.382
## neighbourhoodChinatown -0.00982561 0.07968895 -0.123
## neighbourhoodCity Island 0.46624325 0.11955657 3.900
## neighbourhoodCivic Center 0.18664760 0.12568212 1.485
## neighbourhoodClaremont Village -0.26318292 0.11334005 -2.322
## neighbourhoodClason Point 0.06637398 0.10978809 0.605
## neighbourhoodClifton -0.10760398 0.12413860 -0.867
## neighbourhoodClinton Hill -0.06202483 0.07626291 -0.813
## neighbourhoodCo-op City 0.41572918 0.37157745 1.119
## neighbourhoodCobble Hill 0.00564350 0.09121534 0.062
## neighbourhoodCollege Point 0.02802517 0.12038176 0.233
## neighbourhoodColumbia St -0.16941662 0.10704526 -1.583
## neighbourhoodConcord -0.33584926 0.09905695 -3.390
## neighbourhoodConcourse -0.35881350 0.09152397 -3.920
## neighbourhoodConcourse Village -0.35022239 0.10960180 -3.195
## neighbourhoodConey Island -0.13913722 0.13053772 -1.066
## neighbourhoodCorona -0.14471304 0.08180195 -1.769
## neighbourhoodCrown Heights -0.18375798 0.07141838 -2.573
## neighbourhoodCypress Hills -0.14906206 0.07436662 -2.004
## neighbourhoodDitmars Steinway -0.13840722 0.07247484 -1.910
## neighbourhoodDongan Hills -0.25209224 0.16274809 -1.549
## neighbourhoodDouglaston 0.62370932 0.15007817 4.156
## neighbourhoodDowntown Brooklyn 0.05680846 0.09950029 0.571
## neighbourhoodDUMBO 0.25088636 0.12627734 1.987
## neighbourhoodDyker Heights -0.31638170 0.13552877 -2.334
## neighbourhoodEast Elmhurst -0.07234601 0.07159869 -1.010
## neighbourhoodEast Flatbush -0.31140189 0.07182837 -4.335
## neighbourhoodEast Harlem -0.00418059 0.07211481 -0.058
## neighbourhoodEast Morrisania 0.06940718 0.13838055 0.502
## neighbourhoodEast New York -0.11171926 0.07055237 -1.583
## neighbourhoodEast Village -0.00588189 0.07732182 -0.076
## neighbourhoodEastchester 0.39413202 0.13281691 2.967
## neighbourhoodEdenwald 0.10073617 0.13825770 0.729
## neighbourhoodEdgemere 0.45928450 0.14752392 3.113
## neighbourhoodElmhurst -0.04586177 0.07154189 -0.641
## neighbourhoodEltingville 0.04928136 0.26709942 0.185
## neighbourhoodEmerson Hill -0.12533817 0.22091492 -0.567
## neighbourhoodFar Rockaway 0.51293264 0.11336312 4.525
## neighbourhoodFieldston -0.16600279 0.15312166 -1.084
## neighbourhoodFinancial District 0.25524041 0.07691610 3.318
## neighbourhoodFlatbush -0.36971376 0.07437810 -4.971
## neighbourhoodFlatiron District 0.22763395 0.10339020 2.202
## neighbourhoodFlatlands -0.20682051 0.08335050 -2.481
## neighbourhoodFlushing 0.29539692 0.06890875 4.287
## neighbourhoodFordham -0.16652913 0.08704222 -1.913
## neighbourhoodForest Hills 0.27143346 0.07643973 3.551
## neighbourhoodFort Greene -0.05824557 0.07785717 -0.748
## neighbourhoodFort Hamilton -0.23764506 0.09302927 -2.555
## neighbourhoodFresh Meadows 0.39764141 0.09916676 4.010
## neighbourhoodGlendale -0.23666011 0.08838692 -2.678
## neighbourhoodGowanus 0.01483654 0.08053105 0.184
## neighbourhoodGramercy -0.00140382 0.08254005 -0.017
## neighbourhoodGraniteville 0.30003749 0.37186462 0.807
## neighbourhoodGrant City -0.45547820 0.19396395 -2.348
## neighbourhoodGravesend -0.38418630 0.09634785 -3.987
## neighbourhoodGreat Kills 0.37189977 0.13318659 2.792
## neighbourhoodGreenpoint -0.05165686 0.07381696 -0.700
## neighbourhoodGreenwich Village 0.15246521 0.08091642 1.884
## neighbourhoodGrymes Hill 0.27831000 0.17596886 1.582
## neighbourhoodHarlem -0.09270577 0.07179948 -1.291
## neighbourhoodHell's Kitchen 0.06658374 0.07754466 0.859
## neighbourhoodHighbridge -0.26831268 0.10523732 -2.550
## neighbourhoodHollis 0.54822516 0.14117800 3.883
## neighbourhoodHolliswood 1.38307509 0.26823307 5.156
## neighbourhoodHoward Beach 0.28300020 0.12788561 2.213
## neighbourhoodHowland Hook 0.21152650 0.26736772 0.791
## neighbourhoodHuguenot 0.35335612 0.22196275 1.592
## neighbourhoodHunts Point -0.35405299 0.11496698 -3.080
## neighbourhoodInwood -0.17930922 0.07538338 -2.379
## neighbourhoodJackson Heights -0.05959317 0.07238098 -0.823
## neighbourhoodJamaica 0.43299155 0.07427419 5.830
## neighbourhoodJamaica Estates 0.47652848 0.11919496 3.998
## neighbourhoodJamaica Hills 0.76382679 0.17694090 4.317
## neighbourhoodKensington -0.43337179 0.08298369 -5.222
## neighbourhoodKew Gardens 0.20439973 0.10467297 1.953
## neighbourhoodKew Gardens Hills 0.30063359 0.11267107 2.668
## neighbourhoodKingsbridge -0.16574917 0.08600170 -1.927
## neighbourhoodKips Bay -0.06294762 0.08138423 -0.773
## neighbourhoodLaurelton 0.56760503 0.12303683 4.613
## neighbourhoodLighthouse Hill 0.46386216 0.26674128 1.739
## neighbourhoodLittle Italy -0.02633816 0.08869225 -0.297
## neighbourhoodLittle Neck 0.56431025 0.22518590 2.506
## neighbourhoodLong Island City -0.06806990 0.07357323 -0.925
## neighbourhoodLongwood -0.25730909 0.09011566 -2.855
## neighbourhoodLower East Side -0.01866631 0.07784732 -0.240
## neighbourhoodManhattan Beach -0.30401950 0.16423386 -1.851
## neighbourhoodMarble Hill 0.10155905 0.17659767 0.575
## neighbourhoodMariners Harbor 0.20906207 0.15344361 1.362
## neighbourhoodMaspeth -0.20199637 0.07848588 -2.574
## neighbourhoodMelrose -0.13567952 0.19500991 -0.696
## neighbourhoodMiddle Village -0.01050842 0.09792488 -0.107
## neighbourhoodMidland Beach -0.03755757 0.19399870 -0.194
## neighbourhoodMidtown 0.13637112 0.07758783 1.758
## neighbourhoodMidwood -0.37380059 0.08709251 -4.292
## neighbourhoodMill Basin 0.03554244 0.19468997 0.183
## neighbourhoodMorningside Heights -0.04083690 0.08174499 -0.500
## neighbourhoodMorris Heights -0.48426244 0.13376484 -3.620
## neighbourhoodMorris Park 0.05729539 0.12381834 0.463
## neighbourhoodMorrisania -0.17215617 0.16349160 -1.053
## neighbourhoodMott Haven -0.28525263 0.08637318 -3.303
## neighbourhoodMount Eden -0.51464895 0.26724219 -1.926
## neighbourhoodMount Hope -0.28228600 0.11545008 -2.445
## neighbourhoodMurray Hill 0.04985858 0.08060353 0.619
## neighbourhoodNavy Yard 0.19356895 0.17972430 1.077
## neighbourhoodNeponsit 0.90913355 0.22086458 4.116
## neighbourhoodNew Brighton 0.19804521 0.17597314 1.125
## neighbourhoodNew Dorp Beach -0.38802742 0.26656938 -1.456
## neighbourhoodNew Springville 0.23133183 0.17664842 1.310
## neighbourhoodNoHo 0.19601719 0.09970690 1.966
## neighbourhoodNolita 0.17857368 0.08396976 2.127
## neighbourhoodNorth Riverdale -0.37755865 0.17618941 -2.143
## neighbourhoodNorwood -0.12395241 0.10996453 -1.127
## neighbourhoodOakwood 0.15897217 0.19405151 0.819
## neighbourhoodOlinville -0.01247849 0.26667515 -0.047
## neighbourhoodOzone Park 0.03494678 0.08588990 0.407
## neighbourhoodPark Slope -0.02027421 0.07750808 -0.262
## neighbourhoodParkchester -0.22584648 0.09880091 -2.286
## neighbourhoodPelham Bay 0.22376378 0.12411923 1.803
## neighbourhoodPelham Gardens -0.16120645 0.10012988 -1.610
## neighbourhoodPort Morris -0.33120778 0.09388403 -3.528
## neighbourhoodPort Richmond -0.07532229 0.17607534 -0.428
## neighbourhoodPrince's Bay 0.33919691 0.37313543 0.909
## neighbourhoodProspect Heights -0.07839375 0.07865639 -0.997
## neighbourhoodProspect-Lefferts Gardens -0.30645995 0.07442523 -4.118
## neighbourhoodQueens Village 0.42745785 0.09236844 4.628
## neighbourhoodRandall Manor -0.03507587 0.11453640 -0.306
## neighbourhoodRed Hook -0.21088252 0.09011473 -2.340
## neighbourhoodRego Park 0.00196639 0.07772904 0.025
## neighbourhoodRichmond Hill 0.17148839 0.07839775 2.187
## neighbourhoodRichmondtown -0.26236675 0.37145885 -0.706
## neighbourhoodRidgewood -0.15562456 0.07136032 -2.181
## neighbourhoodRiverdale -0.02734692 0.19463010 -0.141
## neighbourhoodRockaway Beach 0.47908270 0.08876348 5.397
## neighbourhoodRoosevelt Island -0.02780919 0.09810800 -0.283
## neighbourhoodRosebank -0.05542293 0.16311375 -0.340
## neighbourhoodRosedale 0.61696146 0.09353426 6.596
## neighbourhoodRossville 0.20771147 0.37301040 0.557
## neighbourhoodSchuylerville 0.20848551 0.12807724 1.628
## neighbourhoodSea Gate -0.21239655 0.37223131 -0.571
## neighbourhoodSheepshead Bay -0.29633511 0.07808217 -3.795
## neighbourhoodShore Acres -0.59098644 0.26690913 -2.214
## neighbourhoodSilver Lake -0.36388803 0.37144089 -0.980
## neighbourhoodSoHo 0.21020883 0.08007299 2.625
## neighbourhoodSoundview -0.31080479 0.12787100 -2.431
## neighbourhoodSouth Beach 0.16900454 0.15275022 1.106
## neighbourhoodSouth Ozone Park 0.36039755 0.09063935 3.976
## neighbourhoodSouth Slope -0.09549537 0.07925784 -1.205
## neighbourhoodSpringfield Gardens 0.64738238 0.08374254 7.731
## neighbourhoodSpuyten Duyvil 0.16900438 0.22153244 0.763
## neighbourhoodSt. Albans 0.50093184 0.08598072 5.826
## neighbourhoodSt. George 0.00422551 0.09123080 0.046
## neighbourhoodStapleton -0.07394654 0.10181800 -0.726
## neighbourhoodStuyvesant Town 0.00114719 0.13403485 0.009
## neighbourhoodSunnyside -0.17555840 0.07291657 -2.408
## neighbourhoodSunset Park -0.33236749 0.07642401 -4.349
## neighbourhoodTheater District 0.15660269 0.08524855 1.837
## neighbourhoodThrogs Neck 0.30797460 0.10490571 2.936
## neighbourhoodTodt Hill -0.07923354 0.22084688 -0.359
## neighbourhoodTompkinsville -0.15580093 0.08979337 -1.735
## neighbourhoodTottenville 0.83540431 0.18145541 4.604
## neighbourhoodTremont -0.38630543 0.15307095 -2.524
## neighbourhoodTribeca 0.44374720 0.09064056 4.896
## neighbourhoodTwo Bridges -0.02179565 0.09307978 -0.234
## neighbourhoodUnionport -0.03153795 0.19396732 -0.163
## neighbourhoodUniversity Heights -0.32165755 0.12149019 -2.648
## neighbourhoodUpper East Side 0.03259793 0.07377120 0.442
## neighbourhoodUpper West Side 0.03505626 0.07635370 0.459
## neighbourhoodVan Nest -0.16375494 0.16272185 -1.006
## neighbourhoodVinegar Hill 0.06368466 0.12064051 0.528
## neighbourhoodWakefield -0.02045184 0.08889643 -0.230
## neighbourhoodWashington Heights -0.22382717 0.07168248 -3.122
## neighbourhoodWest Brighton -0.02249759 0.11201009 -0.201
## neighbourhoodWest Farms 0.25006931 0.26665649 0.938
## neighbourhoodWest Village 0.17456643 0.07738087 2.256
## neighbourhoodWestchester Square -0.16718585 0.15265050 -1.095
## neighbourhoodWesterleigh -0.03621837 0.26669131 -0.136
## neighbourhoodWhitestone 0.37701584 0.14505343 2.599
## neighbourhoodWilliamsbridge -0.02764262 0.09379034 -0.295
## neighbourhoodWilliamsburg 0.01165297 0.07244470 0.161
## neighbourhoodWillowbrook 0.87549876 0.37150376 2.357
## neighbourhoodWindsor Terrace -0.24014716 0.08353565 -2.875
## neighbourhoodWoodhaven -0.06630459 0.07940355 -0.835
## neighbourhoodWoodlawn -0.27003596 0.13800307 -1.957
## neighbourhoodWoodside -0.11392214 0.07403831 -1.539
## room_typePrivate room -0.68971493 0.00499512 -138.078
## room_typeShared room -1.13438114 0.01543741 -73.483
## minimum_nights -0.00480495 0.00017622 -27.267
## number_of_reviews -0.00044760 0.00004349 -10.292
## calculated_host_listings_count -0.00029200 0.00009529 -3.064
## availability_365 0.00044007 0.00001981 22.219
## tsquare_distance -0.07393171 0.00485827 -15.218
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## neighbourhoodArden Heights 0.653399
## neighbourhoodArrochar 0.028826 *
## neighbourhoodArverne 0.000000000000000264 ***
## neighbourhoodAstoria 0.117412
## neighbourhoodBath Beach 0.000525 ***
## neighbourhoodBattery Park City 0.262430
## neighbourhoodBay Ridge 0.029372 *
## neighbourhoodBay Terrace 0.000005466066201576 ***
## neighbourhoodBaychester 0.574801
## neighbourhoodBayside 0.000000433963238269 ***
## neighbourhoodBayswater 0.000006466826102711 ***
## neighbourhoodBedford-Stuyvesant 0.003524 **
## neighbourhoodBelle Harbor 0.000004275124927277 ***
## neighbourhoodBellerose 0.000000003642604284 ***
## neighbourhoodBelmont 0.584195
## neighbourhoodBensonhurst 0.000000515778980536 ***
## neighbourhoodBergen Beach 0.091598 .
## neighbourhoodBoerum Hill 0.649988
## neighbourhoodBorough Park 0.000000006787005322 ***
## neighbourhoodBreezy Point 0.000001666565680574 ***
## neighbourhoodBriarwood 0.002485 **
## neighbourhoodBrighton Beach 0.004246 **
## neighbourhoodBronxdale 0.024475 *
## neighbourhoodBrooklyn Heights 0.933743
## neighbourhoodBrownsville 0.012291 *
## neighbourhoodBull's Head 0.122158
## neighbourhoodBushwick 0.029165 *
## neighbourhoodCambria Heights 0.000000000230111765 ***
## neighbourhoodCanarsie 0.028402 *
## neighbourhoodCarroll Gardens 0.437668
## neighbourhoodCastle Hill 0.258962
## neighbourhoodCastleton Corners 0.201725
## neighbourhoodChelsea 0.167020
## neighbourhoodChinatown 0.901871
## neighbourhoodCity Island 0.000096542398726438 ***
## neighbourhoodCivic Center 0.137536
## neighbourhoodClaremont Village 0.020238 *
## neighbourhoodClason Point 0.545474
## neighbourhoodClifton 0.386057
## neighbourhoodClinton Hill 0.416052
## neighbourhoodCo-op City 0.263227
## neighbourhoodCobble Hill 0.950667
## neighbourhoodCollege Point 0.815917
## neighbourhoodColumbia St 0.113511
## neighbourhoodConcord 0.000699 ***
## neighbourhoodConcourse 0.000088632696018338 ***
## neighbourhoodConcourse Village 0.001398 **
## neighbourhoodConey Island 0.286490
## neighbourhoodCorona 0.076895 .
## neighbourhoodCrown Heights 0.010089 *
## neighbourhoodCypress Hills 0.045036 *
## neighbourhoodDitmars Steinway 0.056180 .
## neighbourhoodDongan Hills 0.121401
## neighbourhoodDouglaston 0.000032512275993055 ***
## neighbourhoodDowntown Brooklyn 0.568047
## neighbourhoodDUMBO 0.046957 *
## neighbourhoodDyker Heights 0.019582 *
## neighbourhoodEast Elmhurst 0.312296
## neighbourhoodEast Flatbush 0.000014610597310590 ***
## neighbourhoodEast Harlem 0.953772
## neighbourhoodEast Morrisania 0.615976
## neighbourhoodEast New York 0.113322
## neighbourhoodEast Village 0.939364
## neighbourhoodEastchester 0.003005 **
## neighbourhoodEdenwald 0.466246
## neighbourhoodEdgemere 0.001852 **
## neighbourhoodElmhurst 0.521498
## neighbourhoodEltingville 0.853618
## neighbourhoodEmerson Hill 0.570475
## neighbourhoodFar Rockaway 0.000006077113828277 ***
## neighbourhoodFieldston 0.278321
## neighbourhoodFinancial District 0.000907 ***
## neighbourhoodFlatbush 0.000000671532545171 ***
## neighbourhoodFlatiron District 0.027696 *
## neighbourhoodFlatlands 0.013096 *
## neighbourhoodFlushing 0.000018197596803125 ***
## neighbourhoodFordham 0.055734 .
## neighbourhoodForest Hills 0.000385 ***
## neighbourhoodFort Greene 0.454402
## neighbourhoodFort Hamilton 0.010639 *
## neighbourhoodFresh Meadows 0.000060945081874284 ***
## neighbourhoodGlendale 0.007421 **
## neighbourhoodGowanus 0.853832
## neighbourhoodGramercy 0.986431
## neighbourhoodGraniteville 0.419763
## neighbourhoodGrant City 0.018869 *
## neighbourhoodGravesend 0.000066970587129689 ***
## neighbourhoodGreat Kills 0.005237 **
## neighbourhoodGreenpoint 0.484061
## neighbourhoodGreenwich Village 0.059546 .
## neighbourhoodGrymes Hill 0.113757
## neighbourhoodHarlem 0.196655
## neighbourhoodHell's Kitchen 0.390542
## neighbourhoodHighbridge 0.010791 *
## neighbourhoodHollis 0.000103 ***
## neighbourhoodHolliswood 0.000000253926229845 ***
## neighbourhoodHoward Beach 0.026913 *
## neighbourhoodHowland Hook 0.428867
## neighbourhoodHuguenot 0.111406
## neighbourhoodHunts Point 0.002075 **
## neighbourhoodInwood 0.017385 *
## neighbourhoodJackson Heights 0.410331
## neighbourhoodJamaica 0.000000005624995484 ***
## neighbourhoodJamaica Estates 0.000064097939546765 ***
## neighbourhoodJamaica Hills 0.000015889934877878 ***
## neighbourhoodKensington 0.000000178097996278 ***
## neighbourhoodKew Gardens 0.050861 .
## neighbourhoodKew Gardens Hills 0.007630 **
## neighbourhoodKingsbridge 0.053957 .
## neighbourhoodKips Bay 0.439256
## neighbourhoodLaurelton 0.000003983579965643 ***
## neighbourhoodLighthouse Hill 0.082048 .
## neighbourhoodLittle Italy 0.766499
## neighbourhoodLittle Neck 0.012218 *
## neighbourhoodLong Island City 0.354871
## neighbourhoodLongwood 0.004303 **
## neighbourhoodLower East Side 0.810502
## neighbourhoodManhattan Beach 0.064162 .
## neighbourhoodMarble Hill 0.565238
## neighbourhoodMariners Harbor 0.173063
## neighbourhoodMaspeth 0.010069 *
## neighbourhoodMelrose 0.486588
## neighbourhoodMiddle Village 0.914543
## neighbourhoodMidland Beach 0.846493
## neighbourhoodMidtown 0.078822 .
## neighbourhoodMidwood 0.000017775984886379 ***
## neighbourhoodMill Basin 0.855145
## neighbourhoodMorningside Heights 0.617386
## neighbourhoodMorris Heights 0.000295 ***
## neighbourhoodMorris Park 0.643557
## neighbourhoodMorrisania 0.292353
## neighbourhoodMott Haven 0.000959 ***
## neighbourhoodMount Eden 0.054144 .
## neighbourhoodMount Hope 0.014489 *
## neighbourhoodMurray Hill 0.536208
## neighbourhoodNavy Yard 0.281476
## neighbourhoodNeponsit 0.000038635847207790 ***
## neighbourhoodNew Brighton 0.260419
## neighbourhoodNew Dorp Beach 0.145507
## neighbourhoodNew Springville 0.190357
## neighbourhoodNoHo 0.049318 *
## neighbourhoodNolita 0.033460 *
## neighbourhoodNorth Riverdale 0.032130 *
## neighbourhoodNorwood 0.259668
## neighbourhoodOakwood 0.412665
## neighbourhoodOlinville 0.962679
## neighbourhoodOzone Park 0.684101
## neighbourhoodPark Slope 0.793651
## neighbourhoodParkchester 0.022270 *
## neighbourhoodPelham Bay 0.071430 .
## neighbourhoodPelham Gardens 0.107417
## neighbourhoodPort Morris 0.000420 ***
## neighbourhoodPort Richmond 0.668812
## neighbourhoodPrince's Bay 0.363335
## neighbourhoodProspect Heights 0.318939
## neighbourhoodProspect-Lefferts Gardens 0.000038395384225151 ***
## neighbourhoodQueens Village 0.000003715710278657 ***
## neighbourhoodRandall Manor 0.759423
## neighbourhoodRed Hook 0.019284 *
## neighbourhoodRego Park 0.979817
## neighbourhoodRichmond Hill 0.028722 *
## neighbourhoodRichmondtown 0.479999
## neighbourhoodRidgewood 0.029206 *
## neighbourhoodRiverdale 0.888260
## neighbourhoodRockaway Beach 0.000000068285845369 ***
## neighbourhoodRoosevelt Island 0.776831
## neighbourhoodRosebank 0.734025
## neighbourhoodRosedale 0.000000000043077969 ***
## neighbourhoodRossville 0.577634
## neighbourhoodSchuylerville 0.103578
## neighbourhoodSea Gate 0.568274
## neighbourhoodSheepshead Bay 0.000148 ***
## neighbourhoodShore Acres 0.026825 *
## neighbourhoodSilver Lake 0.327261
## neighbourhoodSoHo 0.008665 **
## neighbourhoodSoundview 0.015081 *
## neighbourhoodSouth Beach 0.268560
## neighbourhoodSouth Ozone Park 0.000070236305106912 ***
## neighbourhoodSouth Slope 0.228265
## neighbourhoodSpringfield Gardens 0.000000000000011115 ***
## neighbourhoodSpuyten Duyvil 0.445538
## neighbourhoodSt. Albans 0.000000005745456556 ***
## neighbourhoodSt. George 0.963058
## neighbourhoodStapleton 0.467685
## neighbourhoodStuyvesant Town 0.993171
## neighbourhoodSunnyside 0.016062 *
## neighbourhoodSunset Park 0.000013732037801157 ***
## neighbourhoodTheater District 0.066220 .
## neighbourhoodThrogs Neck 0.003331 **
## neighbourhoodTodt Hill 0.719769
## neighbourhoodTompkinsville 0.082735 .
## neighbourhoodTottenville 0.000004167298214400 ***
## neighbourhoodTremont 0.011619 *
## neighbourhoodTribeca 0.000000985940162644 ***
## neighbourhoodTwo Bridges 0.814862
## neighbourhoodUnionport 0.870839
## neighbourhoodUniversity Heights 0.008112 **
## neighbourhoodUpper East Side 0.658581
## neighbourhoodUpper West Side 0.646145
## neighbourhoodVan Nest 0.314258
## neighbourhoodVinegar Hill 0.597582
## neighbourhoodWakefield 0.818044
## neighbourhoodWashington Heights 0.001795 **
## neighbourhoodWest Brighton 0.840815
## neighbourhoodWest Farms 0.348359
## neighbourhoodWest Village 0.024083 *
## neighbourhoodWestchester Square 0.273431
## neighbourhoodWesterleigh 0.891975
## neighbourhoodWhitestone 0.009351 **
## neighbourhoodWilliamsbridge 0.768204
## neighbourhoodWilliamsburg 0.872210
## neighbourhoodWillowbrook 0.018449 *
## neighbourhoodWindsor Terrace 0.004047 **
## neighbourhoodWoodhaven 0.403707
## neighbourhoodWoodlawn 0.050390 .
## neighbourhoodWoodside 0.123893
## room_typePrivate room < 0.0000000000000002 ***
## room_typeShared room < 0.0000000000000002 ***
## minimum_nights < 0.0000000000000002 ***
## number_of_reviews < 0.0000000000000002 ***
## calculated_host_listings_count 0.002185 **
## availability_365 < 0.0000000000000002 ***
## tsquare_distance < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3657 on 24312 degrees of freedom
## Multiple R-squared: 0.6269, Adjusted R-squared: 0.6235
## F-statistic: 183.2 on 223 and 24312 DF, p-value: < 0.00000000000000022
#Surfacing only significant neighborhoods
nyc.model2 = tidy(nyc.model)
options(scipen = 999)
invisible(nyc.model2)
nyc.modeldf <- nyc.model2[nyc.model2$p.value < 0.05,]
invisible(view(nyc.modeldf))
#would reducing the data set to just sig values help?
#To be built and provided by Reagan, there should include about 4 or so intuitive models and we take the one with the best Adj rsquared
nyc.fwd = regsubsets(lprice~neighbourhood + room_type + neighbourhood:room_type + minimum_nights + number_of_reviews + calculated_host_listings_count + availability_365 + minimum_nights:number_of_reviews + minimum_nights:calculated_host_listings_count + number_of_reviews:calculated_host_listings_count + availability_365:minimum_nights + availability_365:number_of_reviews + availability_365:calculated_host_listings_count + tsquare_distance:minimum_nights + tsquare_distance:number_of_reviews + tsquare_distance:calculated_host_listings_count + tsquare_distance:availability_365, method = "forward", data=log.nyc, nvmax=20)
## Reordering variables and trying again:
summary(nyc.fwd)$adjr2
## [1] 0.3636217 0.4584526 0.4814209 0.5021979 0.5114939 0.5196637 0.5260967
## [8] 0.5317874 0.5362072 0.5406042 0.5496958 0.5525292 0.5553844 0.5582046
## [15] 0.5607291 0.5632855 0.5654550 0.5677430 0.5701914 0.5726802 0.5750815
#predict.regsubsets =function (object , newdata ,id ,...){
# form=as.formula (object$call [[2]])
# mat=model.matrix(form ,newdata )
# coefi=coef(object ,id=id)
# xvars=names(coefi)
# mat[,xvars]%*%coefi
#}
#testASE<-c()
#note my index is to 20 since that what I set it in regsubsets
#for (i in 1:20){
# predictions<-predict.regsubsets(object=reg.fwd,newdata=test,id=i)
# testASE[i]<-mean((log(test$AvgWinnings)-predictions)^2)
#}
#par(mfrow=c(1,1))
#plot(1:20,testASE,type="l",xlab="# of predictors",ylab="test vs train ASE",ylim=c(0.2,0.8))
#index<-which(testASE==min(testASE))
#points(index,testASE[index],col="red",pch=10)
#rss<-summary(reg.fwd)$rss
#lines(1:20,rss/100,lty=3,col="blue") #Dividing by 100 since ASE=RSS/sample size
#coef(reg.final,XXXX)
#final.model<-lm(log(AvgWinnings)~Greens+AvgPutts+Save,data=golf)
#summary(final.model)
nyc.bck = regsubsets(lprice~neighbourhood + room_type + neighbourhood:room_type + minimum_nights + number_of_reviews + calculated_host_listings_count + availability_365 + minimum_nights:number_of_reviews + minimum_nights:calculated_host_listings_count + number_of_reviews:calculated_host_listings_count + availability_365:minimum_nights + availability_365:number_of_reviews + availability_365:calculated_host_listings_count + tsquare_distance:minimum_nights + tsquare_distance:number_of_reviews + tsquare_distance:calculated_host_listings_count + tsquare_distance:availability_365, method="backward", data=log.nyc, nvmax=20)
## Reordering variables and trying again:
summary(nyc.bck)$adjr2
## [1] 0.3636217 0.4584526 0.4814209 0.5021979 0.5114939 0.5183706 0.5241404
## [8] 0.5286630 0.5333358 0.5378035 0.5423879 0.5472786 0.5521801 0.5566577
## [15] 0.5604016 0.5637242 0.5665884 0.5693934 0.5721132 0.5746263 0.5770165
#summary(nyc.bck)$rss
#summary(nyc.bck)$bic
nyc.seq = regsubsets(lprice~neighbourhood + room_type + neighbourhood:room_type + minimum_nights + number_of_reviews + calculated_host_listings_count + availability_365 + minimum_nights:number_of_reviews + minimum_nights:calculated_host_listings_count + number_of_reviews:calculated_host_listings_count + availability_365:minimum_nights + availability_365:number_of_reviews + availability_365:calculated_host_listings_count + tsquare_distance:minimum_nights + tsquare_distance:number_of_reviews + tsquare_distance:calculated_host_listings_count + tsquare_distance:availability_365, method="seqrep", data=log.nyc, nvmax=20)
## Reordering variables and trying again:
summary(nyc.seq)$adjr2
## [1] 0.3636217 0.4584526 0.4814209 0.5021979 0.5114939 0.5196637 0.5260967
## [8] 0.5317874 0.5362072 0.5488486 0.5517418 0.5546502 0.5574035 0.5609530
## [15] 0.5642190 0.5673411 0.5697029 0.5721765 0.5746351 0.5769443 0.5792610
#summary(nyc.seq)$rss
#summary(nyc.seq)$bic
#Due to an exhaustive method being too large. We are commenting out this model and moving forward with selective methods that will run
#nyc.exh = regsubsets(lprice~neighbourhood + room_type + neighbourhood:room_type + minimum_nights + number_of_reviews + calculated_host_listings_count + availability_365 + minimum_nights:number_of_reviews + minimum_nights:calculated_host_listings_count + number_of_reviews:calculated_host_listings_count + availability_365:minimum_nights + availability_365:number_of_reviews + availability_365:calculated_host_listings_count + tsquare_distance:minimum_nights + tsquare_distance:number_of_reviews + tsquare_distance:calculated_host_listings_count + tsquare_distance:availability_365, data=log.nyc, nvmax=20)
#summary(nyc.exh)$adjr2
#summary(nyc.exh)$rss
#summary(nyc.exh)$bic
par(mfrow=c(2,2))
full.model<-lm(price~.,data=nyc4)
plot(full.model)
par(mfrow=c(2,2))
log.depend.model<-lm(lprice~.,data=log.nyc)
plot(log.depend.model)
vif(log.depend.model)[,3]^2
## neighbourhood room_type
## 1.020103 1.080610
## minimum_nights number_of_reviews
## 1.083880 1.061315
## calculated_host_listings_count availability_365
## 1.547333 1.101061
## tsquare_distance
## 41.098477